// 到这个网址可以在线编码，实操transformer.js，并且相关模型已经准备好：https://chn.ai/embedding.html
// 视频讲解 https://www.bilibili.com/video/BV1ZD421j7os/
// 文字讲解：https://blog.csdn.net/fribbler/article/details/136728513



import { AutoModel, AutoTokenizer, dot } from 'https://res.chn.ai/module/transformers@2.15.1/transformers@2.15.1.js'

let tokenizer = await AutoTokenizer.from_pretrained('Xenova/jina-embeddings-v2-base-zh')
let model = await AutoModel.from_pretrained('Xenova/jina-embeddings-v2-base-zh')

function stringify(obj) {
    return JSON.stringify(obj, (key, value) => {
        return typeof value === 'bigint'
            ? value.toString()
            : value
        
    }, null, 2)
}


var tokens = await tokenizer(['鲜花', '牛粪', '白云'], {padding: true})
var tokenIds = Array.from(tokens.input_ids.data)
var words = await tokenizer.decode(tokenIds).split(' ')
console.log(words)


var embeddings = await model(tokens)
console.log(stringify(embeddings))

